apiVersion: ray.io/v1alpha1
kind: RayCluster
metadata:
  name: rayllm
spec:
  # Ray head pod template
  headGroupSpec:
    # The `rayStartParams` are used to configure the `ray start` command.
    # See https://github.com/ray-project/kuberay/blob/master/docs/guidance/rayStartParams.md for the default settings of `rayStartParams` in KubeRay.
    # See https://docs.ray.io/en/latest/cluster/cli.html#ray-start for all available options in `rayStartParams`.
    rayStartParams:
      resources: '"{\"accelerator_type_cpu\": 2}"'
      dashboard-host: '0.0.0.0'
    #pod template
    template:
      spec:
        containers:
        - name: ray-head
          image: anyscale/ray-llm:latest
          resources:
            limits:
              cpu: 2
              memory: 8Gi
            requests:
              cpu: 2
              memory: 8Gi
          ports:
          - containerPort: 6379
            name: gcs-server
          - containerPort: 8265 # Ray dashboard
            name: dashboard
          - containerPort: 10001
            name: client
  workerGroupSpecs:
  # the pod replicas in this group typed worker
  - replicas: 1
    minReplicas: 0
    maxReplicas: 1
    # logical group name, for this called small-group, also can be functional
    groupName: gpu-group
    rayStartParams:
      resources: '"{\"accelerator_type_cpu\": 48, \"accelerator_type_a10\": 2, \"accelerator_type_a100_80g\": 2}"'
    #pod template
    template:
      spec:
        containers:
        - name: llm
          image: anyscale/ray-llm:latest
          lifecycle:
            preStop:
              exec:
                command: ["/bin/sh","-c","ray stop"]
          resources:
            limits:
              cpu: "48"
              memory: "192G"
              nvidia.com/gpu: 4
            requests:
              cpu: "36"
              memory: "128G"
              nvidia.com/gpu: 4
        # Please add the following taints to the GPU node.
        tolerations:
          - key: "ray.io/node-type"
            operator: "Equal"
            value: "worker"
            effect: "NoSchedule"
